{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "predict.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyPDz0l3An7jy4t7s2rFFKuV",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/vitaldb/examples/blob/master/predict.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "EVNIp6lwk5DD"
      },
      "source": [
        "# Prediction Model for Hepatocellular cardinoma in Hepatitis B patients"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Upgrade XGboost library"
      ],
      "metadata": {
        "id": "VOZ6R-8VNmCO"
      }
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cHEggysIlr04",
        "outputId": "9e9d5be3-af99-47f8-ce13-a6601d9f3bd4",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 277
        }
      },
      "source": [
        "!pip uninstall xgboost -y\n",
        "!pip install xgboost"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Found existing installation: xgboost 1.6.2\n",
            "Uninstalling xgboost-1.6.2:\n",
            "  Successfully uninstalled xgboost-1.6.2\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting xgboost\n",
            "  Using cached xgboost-1.6.2-py3-none-manylinux2014_x86_64.whl (255.9 MB)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from xgboost) (1.21.6)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from xgboost) (1.7.3)\n",
            "Installing collected packages: xgboost\n",
            "Successfully installed xgboost-1.6.2\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "xgboost"
                ]
              }
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Download model and sample files"
      ],
      "metadata": {
        "id": "O5PmN3NMNrXY"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://github.com/vitaldb/hbvhcc/raw/main/model.json\n",
        "!wget https://github.com/vitaldb/hbvhcc/raw/main/sample.csv"
      ],
      "metadata": {
        "id": "4PuRpTXONvY_",
        "outputId": "6369f5bf-3c86-4994-fc30-aea6681e63df",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2022-11-24 15:29:46--  https://github.com/vitaldb/hbvhcc/raw/main/model.json\n",
            "Resolving github.com (github.com)... 20.27.177.113\n",
            "Connecting to github.com (github.com)|20.27.177.113|:443... connected.\n",
            "HTTP request sent, awaiting response... 301 Moved Permanently\n",
            "Location: https://github.com/vitaldb/planb/raw/main/model.json [following]\n",
            "--2022-11-24 15:29:47--  https://github.com/vitaldb/planb/raw/main/model.json\n",
            "Reusing existing connection to github.com:443.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://raw.githubusercontent.com/vitaldb/planb/main/model.json [following]\n",
            "--2022-11-24 15:29:47--  https://raw.githubusercontent.com/vitaldb/planb/main/model.json\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 60814 (59K) [text/plain]\n",
            "Saving to: ‘model.json’\n",
            "\n",
            "model.json          100%[===================>]  59.39K  --.-KB/s    in 0.01s   \n",
            "\n",
            "2022-11-24 15:29:47 (4.04 MB/s) - ‘model.json’ saved [60814/60814]\n",
            "\n",
            "--2022-11-24 15:29:47--  https://github.com/vitaldb/hbvhcc/raw/main/sample.csv\n",
            "Resolving github.com (github.com)... 20.27.177.113\n",
            "Connecting to github.com (github.com)|20.27.177.113|:443... connected.\n",
            "HTTP request sent, awaiting response... 301 Moved Permanently\n",
            "Location: https://github.com/vitaldb/planb/raw/main/sample.csv [following]\n",
            "--2022-11-24 15:29:48--  https://github.com/vitaldb/planb/raw/main/sample.csv\n",
            "Reusing existing connection to github.com:443.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://raw.githubusercontent.com/vitaldb/planb/main/sample.csv [following]\n",
            "--2022-11-24 15:29:48--  https://raw.githubusercontent.com/vitaldb/planb/main/sample.csv\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 425 [text/plain]\n",
            "Saving to: ‘sample.csv’\n",
            "\n",
            "sample.csv          100%[===================>]     425  --.-KB/s    in 0s      \n",
            "\n",
            "2022-11-24 15:29:48 (27.1 MB/s) - ‘sample.csv’ saved [425/425]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Simple example for single patient\n",
        "\n"
      ],
      "metadata": {
        "id": "UtZFeTVjNCD3"
      }
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "c8eh2Sx6k21M",
        "outputId": "a81f7297-1bf3-4c8b-8675-6e4defdc1cb8",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "source": [
        "import xgboost as xgb\n",
        "import numpy as np\n",
        "\n",
        "# sample values\n",
        "male = 0\n",
        "age = 48\n",
        "e_t = 1\n",
        "lc = 1  # None for missing value\n",
        "plt = 172\n",
        "alb = 4.1\n",
        "tbil = 1.1\n",
        "alt = 122\n",
        "dna = 74279943\n",
        "hbeag = 1\n",
        "\n",
        "model = xgb.Booster()\n",
        "model.load_model('model.json')\n",
        "x = np.array([male, age, e_t, lc, plt, alb, tbil, alt, dna, hbeag]).astype(float)\n",
        "x = x[None, ...]\n",
        "x = xgb.DMatrix(x)\n",
        "y = model.predict(x)[0]\n",
        "\n",
        "print('score = {:.3f}'.format(y))"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "score = 0.196\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Example code using CSV file for multiple patients "
      ],
      "metadata": {
        "id": "MQqfAf4-NPOr"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import xgboost as xgb\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "\n",
        "model = xgb.Booster()\n",
        "model.load_model('model.json')\n",
        "\n",
        "df = pd.read_csv('sample.csv')\n",
        "x = df[['male', 'age', 'e_t', 'lc', 'plt', 'alb', 'tbil', 'alt', 'dna', 'hbeag']].astype(float).values\n",
        "df['pred'] = model.predict(xgb.DMatrix(x))\n",
        "df.to_csv('prediction.csv')\n",
        "print(df)"
      ],
      "metadata": {
        "id": "iEffyN41OqSe",
        "outputId": "b4bef14b-a804-4caf-ed72-e6832427e1aa",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "      no  male   age  e_t   lc    plt  alb  tbil    alt          dna  hbeag  \\\n",
            "0    1.0   1.0  33.0  0.0  0.0  167.0  4.4   1.1   84.0  170000000.0    1.0   \n",
            "1    2.0   0.0  56.0  0.0  0.0  168.0  3.6   0.7  223.0    7990000.0    1.0   \n",
            "2    3.0   0.0  62.0  1.0  1.0  164.0  4.3   0.5  104.0       6080.0    0.0   \n",
            "3    4.0   1.0  46.0  1.0  0.0  317.0  4.3   0.7   66.0      45100.0    0.0   \n",
            "4    5.0   1.0  54.0  1.0  1.0  122.0  4.6   1.3  147.0   12500000.0    0.0   \n",
            "5    6.0   1.0  30.0  0.0  0.0  198.0  4.4   1.4  955.0  170000000.0    1.0   \n",
            "6    7.0   1.0  48.0  0.0  1.0  111.0  4.2   1.3   95.0     262000.0    0.0   \n",
            "7    8.0   0.0  58.0  1.0  1.0  138.0  4.3   0.7   45.0      17100.0    0.0   \n",
            "8    9.0   0.0  50.0  0.0  1.0  154.0  4.2   1.0   61.0     790000.0    0.0   \n",
            "9   10.0   0.0  65.0  0.0  0.0  187.0  4.6   0.6   23.0          0.0    0.0   \n",
            "10   NaN   NaN   NaN  NaN  NaN    NaN  NaN   NaN    NaN          NaN    NaN   \n",
            "\n",
            "        pred  \n",
            "0   0.095564  \n",
            "1   0.163563  \n",
            "2   0.286515  \n",
            "3   0.095078  \n",
            "4   0.412359  \n",
            "5   0.057932  \n",
            "6   0.630031  \n",
            "7   0.355294  \n",
            "8   0.450027  \n",
            "9   0.201603  \n",
            "10  0.339060  \n"
          ]
        }
      ]
    }
  ]
}